function [J, grad] = costFunction(theta, X, y)
    %   The number of the training examples
    m = length(y);
    %   Get the the vaule of the h(x)
    temp = sigmoid(X * theta);
    %   Compute the cost
    J = (-y' * log(temp) - (1 - y)' * log(1 - temp)) / m;
    %   Compute the gradient
    grad = (X' * (temp - y)) / m;
end
